Information Visualisation Project#

# Imports 
import pandas as pd
import numpy as np 
import plotly.express as px
import plotly.graph_objects as go
import statistics as st
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr

import matplotlib.patches as mpatches

Aggregating the data

# Import the Dataset
data = pd.read_csv('spotify_songs.csv', sep = ',')

# Creating a second dataframe for popular tracks only
data_popular = data[data['track_popularity'] > 90]

def usable_date(dataframe):
    if dataframe.count('-') == 0:
        return dataframe + '-01-01'
    else: return dataframe

# Create useable date
data['track_album_release_date'] = data['track_album_release_date'].apply(usable_date) 
data['track_album_release_date'] = pd.to_datetime(data['track_album_release_date'], format = '%Y-%m-%d', errors = 'coerce')

# Dropping unused columns for optimisation
data = data.drop(['language', 'track_id', 'lyrics', 'track_album_id', 'track_album_name', 'playlist_name', 'playlist_id', 'duration_ms'], axis = 1)